
> ## ---- Setpup ----
> ## Folders
> dir.create("est/correlates_of_rent")

> dir.create("est/correlates_of_rent/household")

> dir.create("est/correlates_of_rent/neighborhood")

> ## lmer estimation algorithm control
> strict_tol <- lmerControl(optCtrl = list(xtol_abs = 1e-8, ftol_abs = 1e-8))

> ## ---- Data ----
> ## SOEP
> load("dat/proc-data/soep_imp.RData")

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     dplyr::group_by(hh_id, year) %>%
+     dplyr::mutate(weight = mean(weight)) %>%
+     dplyr::slice(1) %>%
+     dplyr::ungroup() %>%
+     dplyr::select(
+       hh_id,
+       year,
+       year_fac,
+       plz,
+       kkz,
+       hh_comp,
+       mover,
+       yrs_since_movein,
+       owner,
+       cold_rent_sqm,
+       cold_rent_load,
+       log_hinc_eq,
+       home_size,
+       hh_mmb,
+       rent,
+       cmr_arm,
+       rent_pchg_2005,
+       cmr_arm_pchg_2005,
+       imputed_rent,
+       asset_re_hom_p,
+       asset_re_hom_m,
+       asset_re_hom_t,
+       asset_ov_ttl_t,
+       weight
+     )
+ }

> for (m in seq_along(soep_imp$imputations)) {
+   soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
+     group_by(hh_id) %>%
+     mutate_at(
+       .vars = vars(rent, cmr_arm, rent_pchg_2005, cmr_arm_pchg_2005),
+       .funs = list(
+         umn = ~ mean(., na.rm = T),
+         cwu = ~ . - mean(., na.rm = T)
+       )
+     ) %>%
+     ungroup()
+ }

> # Microm (neighborhood-level)
> load("dat/proc-data/plz5_f_und_b.RData")

> microm_nbh <- read.dta("dat/proc-data/microm_2005_2018_nbh.dta") %>%
+   mutate(plz = as.character(plz)) %>%
+   mutate(plz = ifelse(nchar(plz) == 4, paste0("0", plz), plz)) %>%
+   left_join(
+     plz5_f_und_b %>%
+       as.data.frame() %>%
+       dplyr::select(-AGS05max, -geometry) %>%
+       group_by(plz) %>%
+       mutate_at(
+         .vars = vars(rent, cmr_arm),
+         .funs = list(`2005` = ~ ifelse(length(.[year == 2005]) == 0L, NA, .[year == 2005]))
+       ) %>%
+       mutate(
+         rent_pchg_2005 = rent / rent_2005 - 1,
+         rent_achg_2005 = rent - rent_2005,
+         cmr_arm_pchg_2005 = cmr_arm / cmr_arm_2005 - 1,
+         cmr_arm_achg_2005 = cmr_arm - cmr_arm_2005
+       ) %>%
+       ungroup(),
+     by = c("plz", "year")
+   ) %>%
+   mutate(
+     nbh_mmo_k_fluktu = as.numeric(droplevels(nbh_mmo_k_fluktu)) - 6.0,
+     nbh_mmo_k_volumen = as.numeric(droplevels(nbh_mmo_k_volumen)) - 5.0,
+     nbh_mmo_k_saldo = as.numeric(droplevels(nbh_mmo_k_saldo)) - 4.0
+   ) %>%
+   mutate_if(is.factor, as.character) %>%
+   mutate(
+     gtyp5 = case_when(
+       gtyp %in% c("[1] Kernstaedte>==gr.Verdraum") ~ "urban > 500k",
+       gtyp %in% c(
+         "[2] Kernstaedte<==gr.Verdraum",
+         "[9] Kernstaedte<==Verdansatz"
+       ) ~ "urban < 500k",
+       gtyp %in% c(
+         "[3] Mi-zent.=hochverd.=gr.Verdraum",
+         "[4] so.Gem.=hochverd.=gr.Verdraum",
+         "[5] Mi-zent.=verd.=gr.Verdraum",
+         "[6] so.Gem.=verd.=gr.Verdraum",
+         "[10] Mi-zent.=verd.=Verdansatz",
+         "[11] so.Gem.=verd.=Verdansatz"
+       ) ~ "suburban",
+       gtyp %in% c(
+         "[7] Mi-zent.=laendl=gr.Verdraum",
+         "[8] so.Gem.=laendl=gr.Verdraum",
+         "[12] Mi-zent.=laendl=Verdansatz",
+         "[13] so.Gem.=laendl=Verdansatz"
+       ) ~ "suburban-rural",
+       gtyp %in% c(
+         "[14] Mi-zent.=verd.=laendl.",
+         "[15] so.Gem.=verd.=laendl.",
+         "[16] Mi-zent.=laendl=laendl.",
+         "[17] so.Gem.=laendl=laendl."
+       ) ~ "remote-rural",
+       TRUE ~ NA_character_
+     )
+   ) %>%
+   group_by(plz) %>%
+   mutate_at(
+     .vars = vars(rent, cmr_arm, rent_pchg_2005, cmr_arm_pchg_2005),
+     .funs = list(
+       umn = ~ mean(., na.rm = T),
+       cwu = ~ . - mean(., na.rm = T)
+     )
+   ) %>%
+   ungroup()

> microm_nbh <- microm_nbh %>%
+   mutate(year_fac = as.factor(year))

> ## ---- Household-level Models ----
> ## Outcomes
> outcomes <- c("cold_rent_sqm", "asset_re_hom_t", "asset_ov_ttl_t")

> outcome_requires_subset <- c(
+   "cold_rent_sqm" = "owner == 0",
+   "asset_re_hom_t" = "owner == 1 & year %in% c(2007, 2012, 2017)",
+   "asset_ov_ttl_t" = NA_character_
+ )

> ## Main predictors
> predictors <- c("rent")

> ## Year fixed effects?
> year_fixed_effects <- c("year_fac +")

> ## Subsets
> subset_owner <- c("owner == 0", "owner == 1")

> ## Model formula components
> predictor_formula <-
+   "[_pred_]_cwu + [_pred_]_umn +"

> ## ---- Compile model code ----
> model_formula <-
+   'mod <- lmer(
+   [_yvar_] ~
+     [_xvars_]
+     (1 | hh_id) + (1 | plz) + (1 | kkz),
+   data = [_data_],
+   control = strict_tol,
+   weights = weight
+ )'

> model_formula_list <- list()

> counter <- 0L

> for (outcome in outcomes) {
+   for (predictor in predictors) {
+     for (year_fe in year_fixed_effects) {
+       ## Covariates
+       predictor_formula_local <-
+         gsub("\\[\\_pred\\_\\]", predictor, predictor_formula)
+       
+       xvars <- predictor_formula_local
+       
+       if (magrittr::not(is.na(year_fe))) {
+         xvars <- paste(xvars, year_fe, sep = " \n ")
+       }
+       
+       
+       if (magrittr::not(is.na(outcome_requires_subset[outcome]))) {
+         ## Counter
+         counter <- counter + 1L
+         
+         ## Data
+         data <- "soep_imp$imputations[[m]]"
+         data <- paste(data,
+                       paste0("filter(", outcome_requires_subset[outcome], ")"),
+                       sep = " %>% \n ")
+         
+         ## Combine
+         model_formula_local <-
+           gsub("\\[\\_yvar\\_\\]", outcome, model_formula)
+         model_formula_local <-
+           gsub("\\[\\_xvars\\_\\]", xvars, model_formula_local)
+         model_formula_local <-
+           gsub("\\[\\_data\\_\\]", data, model_formula_local)
+         
+         ## Return
+         model_formula_list[[counter]] <-
+           list(
+             level = "household",
+             outcome = outcome,
+             predictor = predictor,
+             owner = outcome_requires_subset[outcome],
+             year_fe = magrittr::not(is.na(year_fe)),
+             formula = model_formula_local
+           )
+       } else {
+         for (owner in subset_owner) {
+           ## Counter
+           counter <- counter + 1L
+           
+           ## Data
+           data <- "soep_imp$imputations[[m]]"
+           data <- paste(data, paste0("filter(", owner, ")"), sep = " %>% \n ")
+           
+           if (outcome == "asset_ov_ttl_t") {
+             data <- paste(data, "filter(year %in% c(2007, 2012, 2017))", sep = " %>% \n ")
+           }
+           
+           ## Combine
+           model_formula_local <-
+             gsub("\\[\\_yvar\\_\\]", outcome, model_formula)
+           model_formula_local <-
+             gsub("\\[\\_xvars\\_\\]", xvars, model_formula_local)
+           model_formula_local <-
+             gsub("\\[\\_data\\_\\]", data, model_formula_local)
+           
+           ## Return
+           model_formula_list[[counter]] <-
+             list(
+               level = "household",
+               outcome = outcome,
+               predictor = predictor,
+               owner = owner,
+               year_fe = magrittr::not(is.na(year_fe)),
+               formula = model_formula_local
+             )
+         }
+       }
+     }
+   }
+ }

> ## ---- Estimation ----
> ## Capture start time
> ptm <- proc.time()[3]

> for (analysis in seq_along(model_formula_list)) {
+   ## Estimate model in parallel
+   current_formula <- model_formula_list[[analysis]]$formula
+   
+   ## Set up cluster for parallelized computation across imputations
+   cl <- makeCluster(length(soep_imp$imputations),
+                     outfile = "est/correlates_of_rent_hh.txt",
+                     type = "FORK")
+   est <- parLapply(cl, seq_along(soep_imp$imputations), function (m) {
+     eval(parse(text = current_formula))
+   })
+   
+   ## Exit parallel computation
+   stopCluster(cl)
+   
+   ## Export output
+   # Model matrix (stacked)
+   X <- do.call(rbind, lapply(est, function(obj)
+     obj@pp$X))
+   mod_summary <- lapply(est, summary)
+   
+   # Estimates
+   data.frame(
+     model_id = analysis,
+     outcome = model_formula_list[[analysis]]$outcome,
+     predictor = model_formula_list[[analysis]]$predictor,
+     owner = model_formula_list[[analysis]]$owner,
+     year_fe = model_formula_list[[analysis]]$year_fe,
+     N_obs = nrow(est[[1]]@pp$X),
+     N_hh  = nlevels(est[[1]]@flist$hh_id),
+     N_plz = nlevels(est[[1]]@flist$plz),
+     N_kkz = nlevels(est[[1]]@flist$kkz),
+     convergence_imp1 = est[[1]]@optinfo$conv$opt,
+     convergence_imp2 = est[[2]]@optinfo$conv$opt,
+     convergence_imp3 = est[[3]]@optinfo$conv$opt,
+     convergence_imp4 = est[[4]]@optinfo$conv$opt,
+     convergence_imp5 = est[[5]]@optinfo$conv$opt,
+     sigma_res_imp1 = mod_summary[[1]]$sigma,
+     sigma_id_imp1  = attr(mod_summary[[1]]$varcor$hh_id, "stddev"),
+     sigma_plz_imp1 = attr(mod_summary[[1]]$varcor$plz, "stddev"),
+     sigma_kkz_imp1 = attr(mod_summary[[1]]$varcor$kkz, "stddev"),
+     sigma_res_imp2 = mod_summary[[2]]$sigma,
+     sigma_id_imp2  = attr(mod_summary[[2]]$varcor$hh_id, "stddev"),
+     sigma_plz_imp2 = attr(mod_summary[[2]]$varcor$plz, "stddev"),
+     sigma_kkz_imp2 = attr(mod_summary[[2]]$varcor$kkz, "stddev"),
+     sigma_res_imp3 = mod_summary[[3]]$sigma,
+     sigma_id_imp3  = attr(mod_summary[[3]]$varcor$hh_id, "stddev"),
+     sigma_plz_imp3 = attr(mod_summary[[3]]$varcor$plz, "stddev"),
+     sigma_kkz_imp3 = attr(mod_summary[[3]]$varcor$kkz, "stddev"),
+     sigma_res_imp4 = mod_summary[[4]]$sigma,
+     sigma_id_imp4  = attr(mod_summary[[4]]$varcor$hh_id, "stddev"),
+     sigma_plz_imp4 = attr(mod_summary[[4]]$varcor$plz, "stddev"),
+     sigma_kkz_imp4 = attr(mod_summary[[4]]$varcor$kkz, "stddev"),
+     sigma_res_imp5 = mod_summary[[5]]$sigma,
+     sigma_id_imp5  = attr(mod_summary[[5]]$varcor$hh_id, "stddev"),
+     sigma_plz_imp5 = attr(mod_summary[[5]]$varcor$plz, "stddev"),
+     sigma_kkz_imp5 = attr(mod_summary[[5]]$varcor$kkz, "stddev")
+   ) %>%
+     write.csv(paste0(
+       "est/correlates_of_rent/household/",
+       "model_info",
+       analysis,
+       ".csv"
+     ))
+   
+   model_coefficients <- cbind(
+     data.frame(
+       model_id = analysis,
+       x_names = names(fixef(est[[1]])),
+       x_means = apply(X, 2, mean),
+       x_min = apply(X, 2, min),
+       x_max = apply(X, 2, max),
+       b_imp1 = fixef(est[[1]]),
+       b_imp2 = fixef(est[[2]]),
+       b_imp3 = fixef(est[[3]]),
+       b_imp4 = fixef(est[[4]]),
+       b_imp5 = fixef(est[[5]])
+     ),
+     as.data.frame(as.matrix(vcov(est[[1]]))) %>%
+       rename_all(function (x)
+         paste0(x, "_imp1")),
+     as.data.frame(as.matrix(vcov(est[[2]]))) %>%
+       rename_all(function (x)
+         paste0(x, "_imp2")),
+     as.data.frame(as.matrix(vcov(est[[3]]))) %>%
+       rename_all(function (x)
+         paste0(x, "_imp3")),
+     as.data.frame(as.matrix(vcov(est[[4]]))) %>%
+       rename_all(function (x)
+         paste0(x, "_imp4")),
+     as.data.frame(as.matrix(vcov(est[[5]]))) %>%
+       rename_all(function (x)
+         paste0(x, "_imp5"))
+   ) %>%
+     write.csv(
+       paste0(
+         "est/correlates_of_rent/household/",
+         "model_coefficients",
+         analysis,
+         ".csv"
+       )
+     )
+   
+   ## Clear memory
+   rm(est, X, mod_summary)
+   gc(reset = TRUE)
+   
+   ## Progress
+   print(paste0(
+     "Analysis: ",
+     analysis,
+     ". Time elapsed: ",
+     round((proc.time()[3] - ptm) / 60, 2),
+     " minutes."
+   ))
+ }
[1] "Analysis: 1. Time elapsed: 0.17 minutes."
[1] "Analysis: 2. Time elapsed: 0.22 minutes."
[1] "Analysis: 3. Time elapsed: 0.28 minutes."
[1] "Analysis: 4. Time elapsed: 0.33 minutes."

> ## ---- Collect output ----
> model_files <- list.files("est/correlates_of_rent/household")

> model_info_files <- model_files[startsWith(model_files, "model_info")]

> model_coefficients_files <-
+   model_files[startsWith(model_files, "model_coefficients")]

> ## Model info
> model_info <- data.frame()

> for (file in model_info_files) {
+   model_info <- model_info %>%
+     bind_rows(read.csv(paste0(
+       "est/correlates_of_rent/household/", file
+     )))
+ }

> model_info <- model_info %>%
+   dplyr::select(-X) %>%
+   arrange(model_id)

> write.csv(model_info,
+           file = "est/info_correlates_of_rent_hh.csv",
+           row.names = FALSE)

> ## Model coefficients
> model_coefficients <- data.frame()

> for (file in model_coefficients_files) {
+   model_coefficients <- model_coefficients %>%
+     bind_rows(read.csv(paste0(
+       "est/correlates_of_rent/household/", file
+     )))
+ }

> model_coefficients <- model_coefficients %>%
+   dplyr::select(-X) %>%
+   arrange(model_id)

> write.csv(model_coefficients,
+           file = "est/coef_correlates_of_rent_hh.csv",
+           row.names = FALSE)

> save(model_info, model_coefficients, file = "est/correlates_of_rent_hh.rdata")

> ## ---- Neighborhood-level Models ----
> ## Outcomes
> outcomes <- c(
+   "nbh_mlp_k_status",
+   "nbh_kkr_w_proeinw"
+ )

> ## Main predictors
> predictors <- c("rent")

> ## Year fixed effects?
> year_fixed_effects <- c("year_fac +")

> ## Model formula components
> predictor_formula <-
+   "[_pred_]_cwu + [_pred_]_umn +"

> ## ---- Compile model code ----
> model_formula <-
+   'mod <- lmer(
+   [_yvar_] ~
+     [_xvars_]
+     (1 | plz) + (1 | kkz),
+   data = [_data_],
+   control = strict_tol
+ )'

> model_formula_list <- list()

> counter <- 0L

> for (outcome in outcomes) {
+   for (predictor in predictors) {
+     for (year_fe in year_fixed_effects) {
+       ## Covariates
+       predictor_formula_local <-
+         gsub("\\[\\_pred\\_\\]", predictor, predictor_formula)
+       
+       xvars <- predictor_formula_local
+       
+       if (magrittr::not(is.na(year_fe))) {
+         xvars <- paste(xvars, year_fe, sep = " \n ")
+       }
+       
+       ## Counter
+       counter <- counter + 1L
+       
+       ## Data
+       data <- "microm_nbh"
+       
+       ## Combine
+       model_formula_local <-
+         gsub("\\[\\_yvar\\_\\]", outcome, model_formula)
+       model_formula_local <-
+         gsub("\\[\\_xvars\\_\\]", xvars, model_formula_local)
+       model_formula_local <-
+         gsub("\\[\\_data\\_\\]", data, model_formula_local)
+       
+       ## Return
+       model_formula_list[[counter]] <-
+         list(
+           level = "neighborhood",
+           outcome = outcome,
+           predictor = predictor,
+           year_fe = magrittr::not(is.na(year_fe)),
+           formula = model_formula_local
+         )
+     }
+   }
+ }

> ## ---- Estimation ----
> ## Capture start time
> ptm <- proc.time()[3]

> for (analysis in seq_along(model_formula_list)) {
+   ## Estimate model
+   eval(parse(text = model_formula_list[[analysis]]$formula))
+   
+   ## Export output
+   # Model matrix (stacked)
+   X <- mod@pp$X
+   mod_summary <- summary(mod)
+   
+   # Estimates
+   data.frame(
+     model_id = analysis,
+     outcome = model_formula_list[[analysis]]$outcome,
+     predictor = model_formula_list[[analysis]]$predictor,
+     year_fe = model_formula_list[[analysis]]$year_fe,
+     N_pl8 = nrow(X),
+     N_plz = nlevels(mod@flist$plz),
+     N_kkz = nlevels(mod@flist$kkz),
+     convergence = mod@optinfo$conv$opt,
+     sigma_res = mod_summary$sigma,
+     sigma_plz = attr(mod_summary$varcor$plz, "stddev"),
+     sigma_kkz = attr(mod_summary$varcor$kkz, "stddev")
+   ) %>%
+     write.csv(paste0(
+       "est/correlates_of_rent/neighborhood/",
+       "model_info",
+       analysis,
+       ".csv"
+     ))
+   
+   model_coefficients <- cbind(
+     data.frame(
+       model_id = analysis,
+       x_names = names(fixef(mod)),
+       x_means = apply(X, 2, mean),
+       x_min = apply(X, 2, min),
+       x_max = apply(X, 2, max),
+       b = fixef(mod)
+     ),
+     as.data.frame(as.matrix(vcov(mod)))
+   ) %>%
+     write.csv(
+       paste0(
+         "est/correlates_of_rent/neighborhood/",
+         "model_coefficients",
+         analysis,
+         ".csv"
+       )
+     )
+   
+   ## Clear memory
+   rm(mod, X, mod_summary)
+   gc(reset = TRUE)
+   
+   ## Progress
+   print(paste0(
+     "Analysis: ",
+     analysis,
+     ". Time elapsed: ",
+     round((proc.time()[3] - ptm) / 60, 2),
+     " minutes."
+   ))
+ }
[1] "Analysis: 1. Time elapsed: 0.07 minutes."
[1] "Analysis: 2. Time elapsed: 0.14 minutes."

> ## ---- Collect output ----
> model_files <- list.files("est/correlates_of_rent/neighborhood")

> model_info_files <- model_files[startsWith(model_files, "model_info")]

> model_coefficients_files <-
+   model_files[startsWith(model_files, "model_coefficients")]

> ## Model info
> model_info <- data.frame()

> for (file in model_info_files) {
+   model_info <- model_info %>%
+     bind_rows(read.csv(paste0(
+       "est/correlates_of_rent/neighborhood/", file
+     )))
+ }

> model_info <- model_info %>%
+   dplyr::select(-X) %>%
+   arrange(model_id)

> write.csv(model_info,
+           file = "est/info_correlates_of_rent_nbh.csv",
+           row.names = FALSE)

> ## Model coefficients
> model_coefficients <- data.frame()

> for (file in model_coefficients_files) {
+   model_coefficients <- model_coefficients %>%
+     bind_rows(read.csv(paste0(
+       "est/correlates_of_rent/neighborhood/", file
+     )))
+ }

> model_coefficients <- model_coefficients %>%
+   dplyr::select(-X) %>%
+   arrange(model_id)

> write.csv(model_coefficients,
+           file = "est/coef_correlates_of_rent_nbh.csv",
+           row.names = FALSE)

> save(model_info, model_coefficients, file = "est/correlates_of_rent_nbh.rdata")
